#!/usr/bin/perl
# SPDX-License-Identifier: BSD-2-Clause
# Copyright 1996-2025 The NASM Authors - All Rights Reserved


# Read the source-form of the NASM manual and generate the various
# output forms.

# TODO:
#
# Ellipsis support would be nice.

# Source-form features:
# ---------------------
#
# Bullet \b
#   Bullets the paragraph. Rest of paragraph is indented to cope. In
#   HTML, consecutive groups of bulleted paragraphs become unordered
#   lists.
#
# Indent \>
#   Indents the paragraph equvalently to a bulleted paragraph.  In HTML,
#   an indented paragraph following a bulleted paragraph is included in the
#   same list item.
#
# Blockquote \q
#   Marks the paragraph as a block quote.
#
# Emphasis \e{foobar}
#   produces `_foobar_' in text and italics in HTML, PS, RTF
#
# Inline code \c{foobar}
#   produces ``foobar'' in text, and fixed-pitch font in HTML, PS, RTF
#
# Display code
# \c  line one
# \c   line two
#   produces fixed-pitch font where appropriate, and doesn't break
#   pages except sufficiently far into the middle of a display.
#
# Chapter, header and subheader
# \C{intro} Introduction
# \H{whatsnasm} What is NASM?
# \S{free} NASM Is Free
#   dealt with as appropriate. Chapters begin on new sides, possibly
#   even new _pages_. (Sub)?headers are good places to begin new
#   pages. Just _after_ a (sub)?header isn't.
#   The keywords can be substituted with \K and \k.
#
# Keyword \K{cintro} \k{cintro}
#   Expands to `Chapter 1', `Section 1.1', `Section 1.1.1'. \K has an
#   initial capital whereas \k doesn't. In HTML, will produce
#   hyperlinks.
#
# Web link \W{http://foobar/}{text} or \W{mailto:me@here}\c{me@here}
#   the \W prefix is ignored except in HTML; in HTML the last part
#   becomes a hyperlink to the first part.
#
# Web URL \w{http://foobar/}
#   equivalent to \W{http://foobar}\c{http://foobar/}.
#
# Literals \{ \} \\
#   In case it's necessary, they expand to the real versions.
#
# Nonbreaking hyphen \-
#   Need more be said?
#
# Source comment \#
#   Causes everything after it on the line to be ignored by the
#   source-form processor.
#
# Indexable word \i{foobar} (or \i\e{foobar} or \i\c{foobar}, equally)
#   makes word appear in index, referenced to that point
#   \i\c comes up in code style even in the index; \i\e doesn't come
#   up in emphasised style.
#
# Indexable non-displayed word \I{foobar} or \I\c{foobar}
#   just as \i{foobar} except that nothing is displayed for it
#
# Index rewrite
# \IR{foobar} \c{foobar} operator, uses of
#   tidies up the appearance in the index of something the \i or \I
#   operator was applied to
#
# Index alias
# \IA{foobar}{bazquux} [tidy...]
#   aliases one index tag (as might be supplied to \i or \I) to
#   another, so that \I{foobar} has the effect of \I{bazquux}, and
#   \i{foobar} has the effect of \I{bazquux}foobar.
#
#  If a "tidy" string is provided, it also performs
#   the function of \IR.
#
# Index copy
# \IC{foobar}{bazquux} [tidy...]
#   similar to \IA, but duplicates all the index entries from
#   "foobar" onto the index entry "bazquux", as if every \i{foobar}
#   or \I{foobar}, or its aliases defined by \IA, was immediately followed by
#   \I{bazquux}.
#
# Metadata/macros
# \M{key}{something}
#   defines document metadata, such as authorship, title and copyright;
#   different output formats use this differently.
# \m{key}
#   insert the {something} string associated with metadata {key}
#
# Include subfile
# \& filename
#   includes filename. Recursion is allowed. Must be on a separate line.
#

use File::Spec;

@include_path = ();
$out_path = File::Spec->curdir();

my %srcfiles;			# For dependencies
my $depend_path;

while ($ARGV[0] =~ /^-/) {
    my $opt = shift @ARGV;
    if ($opt eq '-d') {
	$diag = 1;
    } elsif ($opt =~ /^\-M(.*)$/) {
	$depend_path = $1;
    } elsif ($opt =~ /^\-[Ii](.*)$/) {
	push(@include_path, $1);
    } elsif ($opt =~ /^\-[Oo](.*)$/) {
	$out_path = $1;
    }
}

$out_format = shift(@ARGV);
@files = @ARGV;
@files = ('-') unless(scalar(@files));

$| = 1;

$tstruct_previtem = $node = "Top";
$nodes = ($node);
$tstruct_level{$tstruct_previtem} = 0;
$tstruct_last[$tstruct_level{$tstruct_previtem}] = $tstruct_previtem;
$MAXLEVEL = 10;  # really 3, but play safe ;-)

# Read the file; pass a paragraph at a time to the paragraph processor.
$pname = [];
@pnames = @pflags = ();
$para = undef;
foreach $file (@files) {
  &include($file);
}
&got_para($para);
print "$outfile: done.\n";

# Now we've read in the entire document and we know what all the
# heading keywords refer to. Go through and fix up the \k references.
print "$outfile: Fixing up cross-references...\n";
&fixup_xrefs;

# Sort the index tags, according to the slightly odd order I've decided on.
print "$outfile: sorting index tags...\n";
&indexsort;

# Make output directory if necessary
mkdir($out_path);

if ($diag) {
  print "$outfile: writing index-diagnostic file...\n";
  &indexdiag;
}

# OK. Write out the various output files.
my $outfile;
if ($out_format eq 'txt') {
    $outfile = 'nasmdoc.txt';
    print "$outfile: producing text output...\n";
    &write_txt;
} elsif ($out_format eq 'html') {
    $outfile = 'nasm00.html';
    print "$outfile: producing HTML output...\n";
    &write_html;
} elsif ($out_format eq 'dip') {
    $outfile = 'nasmdoc.dip';
    print "$outfile: producing Documentation Intermediate Paragraphs...\n";
    &write_dip;
} else {
    die "$0: unknown output format: $out_format\n";
}

if (defined($depend_path)) {
    # Write dependencies
    print "$outfile: writing dependencies\n";
    open(my $dep, '>', $depend_path)
	or die "$outfile: $depend_path: $!\n";

    if ($out_path ne File::Spec->curdir()) {
	$outfile = File::Spec->catfile($out_path, $outfile);
    }

    my $o = $outfile.' :';
    my $ol = length($o);
    foreach my $sf (sort(keys(%srcfiles))) {
	my $l = length($sf);
	if ($l + $ol > 77) {
	    print $dep $o, " \\\n";
	    $o = '';
	    $ol = 0;
	}
	$o .= ' '.$sf;
	$ol += $l+1;
    }
    print $dep $o, "\n\n";
    close($dep);
}

print "$outfile: done.\n";

sub refpush(\$@) {
    my $ref = shift(@_);
    $$ref = [] unless (defined($$ref));
    push(@$$ref, @_);
    return $$ref;
}
sub reflist($) {
    my($ref) = @_;
    return () unless (defined($ref));
    return @$ref;
}

sub untabify($) {
  my($s) = @_;
  my $o = '';
  my($c, $i, $p);

  $p = 0;
  for ($i = 0; $i < length($s); $i++) {
    $c = substr($s, $i, 1);
    if ($c eq "\t") {
      do {
	$o .= ' ';
	$p++;
      } while ($p & 7);
    } else {
      $o .= $c;
      $p++;
    }
  }
  return $o;
}
sub read_line {
  local $_ = shift;
  $_ = &untabify($_);
  if (/\\& (\S+)/) {
     &include($1);
  } else {
     &get_para($_);
  }
}
sub get_para($_) {
  chomp;
  if (!/\S/ || /^\\(I[ARC]|M)/) { # special case: \I[ARC] \M imply new-paragraph
    &got_para($para);
    $para = undef;
  }
  if (/\S/) {
    s/(^|[^\\])\\#.*$/\1/; # strip comments
    $para .= " " . $_;
  }
}
sub include {
  my $name = shift;
  my $F;

  if ($name eq '-') {
      open($F, '<&', \*STDIN);		# stdin
      print "$outfile: reading stdin...\n";
  } else {
    my $found = 0;
    foreach my $idir ( File::Spec->curdir, @include_path ) {
	my $fpath = File::Spec->catfile($idir, $name);
	if (open($F, '<', $fpath)) {
	    # Assume that make uses VPATH for the input search path,
	    # and so dependencies should not include the search directory.
	    $srcfiles{$name}++;
	    $found = 1;
	    print "$outfile: reading $fpath...\n";
	    last;
	}
    }
    die "$0:$outfile: Cannot open $name: $!\n" unless ($found);
  }
  while (defined($_ = <$F>)) {
     &read_line($_);
  }
  close($F);
}
sub got_para {
  local ($_) = @_;
  my $pflags = "", $i, $w, $l, $t;
  my $para = [];

  return if !/\S/;

  # Replace metadata macros
  while (/^(.*)\\m\{([^\}]*)\}(.*)$/) {
      if (defined($metadata{$2})) {
	  $_ = $1.$metadata{$2}.$3;
      } else {
	  $_ = $1.$2.$3;
      }
  }

  # Strip off _leading_ spaces, then determine type of paragraph.
  s/^\s*//;
  $irewrite = undef;
  if (/^\\c[^{]/) {
    # A code paragraph. The paragraph-array will contain the simple
    # strings which form each line of the paragraph.
    $pflags = "code";
    while (/^\\c (([^\\]|\\[^c])*)(.*)$/) {
      $l = $1;
      $_ = $3;
      $l =~ s/\\\{/\{/g;
      $l =~ s/\\\}/}/g;
      $l =~ s/\\\\/\\/g;
      push @$para, $l;
    }
    $_ = ''; # suppress word-by-word code
  } elsif (/^\\C/) {
    # A chapter heading. Define the keyword and allocate a chapter
    # number.
    $cnum++;
    $hnum = 0;
    $snum = 0;
    $xref = "chapter-$cnum";
    $pflags = "chap $cnum :$xref";
    die "$outfile: badly formatted chapter heading: $_\n" if !/^\\C\{([^\}]*)\}\s*(.*)$/;
    $refs{$1} = "chapter $cnum";
    $node = "Chapter $cnum";
    &add_item($node, 1, $para);
    $xrefnodes{$node} = $xref; $nodexrefs{$xref} = $node;
    $xrefs{$1} = $xref;
    $_ = $2;
    # the standard word-by-word code will happen next
  } elsif (/^\\A/) {
    # An appendix heading. Define the keyword and allocate an appendix
    # letter.
    $cnum++;
    $cnum = 'A' if $cnum =~ /[0-9]+/;
    $hnum = 0;
    $snum = 0;
    $xref = "appendix-$cnum";
    $pflags = "appn $cnum :$xref";
    die "$outfile: badly formatted appendix heading: $_\n" if !/^\\A\{([^\}]*)}\s*(.*)$/;
    $refs{$1} = "appendix $cnum";
    $node = "Appendix $cnum";
    &add_item($node, 1, $para);
    $xrefnodes{$node} = $xref; $nodexrefs{$xref} = $node;
    $xrefs{$1} = $xref;
    $_ = $2;
    # the standard word-by-word code will happen next
  } elsif (/^\\H/) {
    # A major heading. Define the keyword and allocate a section number.
    $hnum++;
    $snum = 0;
    $xref = "section-$cnum.$hnum";
    $pflags = "head $cnum.$hnum :$xref";
    die "$outfile: badly formatted heading: $_\n" if !/^\\[HP]\{([^\}]*)\}\s*(.*)$/;
    $refs{$1} = "section $cnum.$hnum";
    $node = "Section $cnum.$hnum";
    &add_item($node, 2, $para);
    $xrefnodes{$node} = $xref; $nodexrefs{$xref} = $node;
    $xrefs{$1} = $xref;
    $_ = $2;
    # the standard word-by-word code will happen next
  } elsif (/^\\S/) {
    # A sub-heading. Define the keyword and allocate a section number.
    $snum++;
    $xref = "section-$cnum.$hnum.$snum";
    $pflags = "subh $cnum.$hnum.$snum :$xref";
    die "$outfile: badly formatted subheading: $_\n" if !/^\\S\{([^\}]*)\}\s*(.*)$/;
    $refs{$1} = "section $cnum.$hnum.$snum";
    $node = "Section $cnum.$hnum.$snum";
    &add_item($node, 3, $para);
    $xrefnodes{$node} = $xref; $nodexrefs{$xref} = $node;
    $xrefs{$1} = $xref;
    $_ = $2;
    # the standard word-by-word code will happen next
  } elsif (/^\\IR/) {
    # An index-rewrite.
    die "$outfile: badly formatted index rewrite: $_\n" if !/^\\IR\{([^\}]*)\}\s*(.+?)\s*$/;
    $irewrite = $1;
    $_ = $2;
    # the standard word-by-word code will happen next
  } elsif (/^\\I([AC])/) {
      # An index alias or copy
      my $what = $1 eq 'C' ? 'copy' : 'alias';
      die "$outfile: badly formatted index $what: $_\n"
	  if !/^\\I[AC]\{([^\}]*)}\{([^\}]*)\}\s*(.*?)\s*$/;
      my $from = $1;
      my $to   = $2;
      my $tidy = $3;
      if ($what eq 'copy') {
	  refpush($idxcopy{$from}, $to);
      } else {
	  $idxalias{$from} = $to;
      }
      return if ($tidy eq '');	# No rewrite, skip word by word code

      $irewrite = $to;
      $_ = $tidy;
  } elsif (/^\\M/) {
    # Metadata
    die "$outfile: badly formed metadata: $_\n" if !/^\\M\{([^\}]*)}\{([^\}]*)\}\s*$/;
    $metadata{$1} = $2;
    return; # avoid word-by-word code
  } elsif (/^\\([b\>q])/) {
    # An indented paragraph of some sort. Strip off the initial \b and let the
      # word-by-word code take care of the rest.
      my %ipar = (
	  'b' => 'bull',
	  '>' => 'indt',
	  'q' => 'bquo',
	  );
    $pflags = $ipar{$1};
    s/^\\[b\>q]\s*//;
  } else {
    # A normal paragraph. Just set $pflags: the word-by-word code does
    # the rest.
    $pflags = "norm";
  }

  # The word-by-word code: unless @$para is already defined (which it
  # will be in the case of a code paragraph), split the paragraph up
  # into words and push each on @$para.
  #
  # Each thing pushed on @$para should have a two-character type
  # code followed by the text.
  #
  # Type codes are:
  # "n " for normal
  # "da" for an en dash
  # "dm" for an em desh
  # "es" for first emphasised word in emphasised bit
  # "e " for emphasised in mid-emphasised-bit
  # "ee" for last emphasised word in emphasised bit
  # "eo" for single (only) emphasised word
  # "c " for code
  # "k " for cross-ref
  # "kK" for capitalised cross-ref
  # "w " for Web link
  # "wc" for code-type Web link
  # "x " for beginning of resolved cross-ref; generates no visible output,
  #      and the text is the cross-reference code
  # "xe" for end of resolved cross-ref; text is same as for "x ".
  # "i " for point to be indexed: the text is the internal index into the
  #      index-items arrays
  # "sp" for space
  while (/\S/) {
    s/^\s*//, push @$para, "sp" if /^\s/;
    $indexing = $qindex = 0;
    if (/^(\\[iI])?\\c/) {
      $qindex = 1 if $1 eq "\\I";
      $indexing = 1, s/^\\[iI]// if $1;
      s/^\\c//;
      die "$outfile: badly formatted \\c: \\c$_\n" if !/\{(([^\\}]|\\.)*)\}(.*)$/;
      $w = $1;
      $_ = $3;
      $w =~ s/\\\{/\{/g;
      $w =~ s/\\\}/\}/g;
      $w =~ s/\\-/-/g;
      $w =~ s/\\\\/\\/g;
      push(@$para, addidx($node, $w, "c $w")) if ($indexing);
      push(@$para, "c $w") if (!$qindex);
    } elsif (/^\\[iIe]/) {
      /^(\\[iI])?(\\e)?/;
      $emph = 0;
      $qindex = 1 if $1 eq "\\I";
      $indexing = 1, $type = "\\i" if $1;
      $emph = 1, $type = "\\e" if $2;
      s/^(\\[iI])?(\\e?)//;
      die "$outfile: badly formatted $type: $type$_\n" if !/\{(([^\\}]|\\.)*)\}(.*)$/;
      $w = $1;
      $_ = $3;
      $w =~ s/\\\{/\{/g;
      $w =~ s/\\\}/\}/g;
      $w =~ s/\\-/-/g;
      $w =~ s/\\\\/\\/g;
      $t = $emph ? "es" : "n ";
      @ientry = ();
      @pentry = ();
      foreach $i (split /\s+/,$w) {  # \e and \i can be multiple words
        push @pentry, "$t$i","sp";
	($ii=$i) =~ tr/A-Z/a-z/, push @ientry,"n $ii","sp";
	$t = $emph ? "e " : "n ";
      }
      if ($indexing) {
	  $w =~ tr/A-Z/a-z/;
	  pop @ientry;		# remove final space
	  push(@$para, addidx($node, $w, @ientry));
      }
      if (!$qindex) {
	  pop @pentry;		# remove final space
	  if (substr($pentry[-1],0,2) eq 'es') {
	      substr($pentry[-1],0,2) = 'eo';
	  } elsif ($emph) {
	      substr($pentry[-1],0,2) = 'ee';
	  }
	  push(@$para, @pentry);
      }
    } elsif (/^\\[kK]/) {
      $t = "k ";
      $t = "kK" if /^\\K/;
      s/^\\[kK]//;
      die "$outfile: badly formatted \\k: \\k$_\n" if !/\{([^\}]*)\}(.*)$/;
      $_ = $2;
      push @$para,"$t$1";
    } elsif (/^\\[Ww]/) {
	if (/^\\w/) {
	    die "$outfile: badly formatted \\w: $_\n"
		if !/^\\w(\\i)?\{([^\\}]*)\}(.*)$/;
	    $l = $2;
	    $w = $2;
	    $indexing = $1;
	    $c = 1;
	    $_ = $3;
	} else {
	    die "$outfile: badly formatted \\W: $_\n"
		if !/^\\W\{([^\\}]*)\}(\\i)?(\\c)?\{(([^\\}]|\\.)*)\}(.*)$/;
	    $l = $1;
	    $w = $4;
	    $_ = $6;
	    $indexing = $2;
	    $c = $3;
	}
	$t = $c ? 'wc' : 'w ';
	$w =~ s/\\\{/\{/g;
	$w =~ s/\\\}/\}/g;
	$w =~ s/\\-/-/g;
	$w =~ s/\\\\/\\/g;
	push(@$para, addidx($node, $w, "c $w")) if $indexing;
	push(@$para, "$t<$l>$w");
    } else {
      die "$outfile: what the hell? $_\n" if !/^(([^\s\\\-]|\\[\\{}\-])*-?)(.*)$/;
      die "$outfile: painful death! $_\n" if !length $1;
      $w = $1;
      $_ = $3;
      $w =~ s/\\\{/\{/g;
      $w =~ s/\\\}/\}/g;
      $w =~ s/\\-/-/g;
      $w =~ s/\\\\/\\/g;
      if ($w eq '--') {
	  push @$para, 'dm';
      } elsif ($w eq '-') {
        push @$para, 'da';
      } else {
        push @$para,"n $w";
      }
    }
  }
  if ($irewrite ne undef) {
    addidx(undef, $irewrite, @$para);
  } else {
    push @pnames, $para;
    push @pflags, $pflags;
  }
}

sub indexalias($) {
    my($text) = @_;
    my $a = $idxalias{$text};
    return defined($a) ? $a : $text;
}

sub addidx($$@) {
  my($node, $text, @ientry) = @_;

  my $ta = indexalias($text);

  my @out;
  foreach my $t ($text, reflist($idxcopy{$text})) {
      $t = indexalias($t);
      if (!exists($idxmap{$t})) {
	  $idxmap{$t} = [@ientry];
	  $idxdup{$t} = [$t];
      } elsif (!defined($node)) {
	  my $dupentry = sprintf('%s    #%05d', $t, $#{$idxdup{$t}} + 2);
	  $idxmap{$dummy} = [@ientry];
	  refpush($idxdup{$t}, $dummy);
      }

      if (defined($node)) {
	  push(@out, map { $idxnodes{$node,$_} = 1; "i $_" } @{$idxdup{$t}});
      }
  }

  return @out;
}

sub indexsort {
  my $iitem, $ientry, $i, $piitem, $pcval, $cval, $clrcval;

  @itags = map { # get back the original data as the 1st elt of each list
      $_->[0]
  } sort { # compare auxiliary (non-first) elements of lists
      my $d = 0;
      for (my $i = 1; defined($a->[$i]) || defined($b->[$i]); $i++) {
	  $d = $a->[$i] cmp $b->[$i];
	  last if ($d);
      }
      $d
  } map { # transform array into list of 3-element lists
      my $ientry = $idxmap{$_};
      my $b = lc(join(' ', map { substr($_,2) } @$ientry));
      $b =~ s/([][(){}]+|\B,)//g;
      $b =~ s/\s+/ /g;
      my $a = $b;
      $a =~ s/([[:alpha:]])/Z$1/g;
      # From this point on [A-Z] means an already classed character
	  # Try to sort numbers in numerical order (e.g. 8 before 16)
      while ($a =~ /^(|.*?[^A-Z])(\d+)(\.\d+)?(.*)$/) {
	  my $p = $1; my $s = $4;
	  my $nn = ('0' x (24 - length($2))) . $2 . $3;
	  $nn =~ s/(.)/D$1/g;
	  $a = $p . $nn . $s;
      }
      $a =~ s/([^A-Z\s])/A$1/g;
      my $c = join(' ', map { substr($_,0,2) } @$ientry);
      my $v = [$_, $a, $b, $_, $c];
      $v
  } keys %idxmap;

  # Having done that, check for comma-hood.
  $cval = 0;
  foreach $iitem (@itags) {
    $ientry = $idxmap{$iitem};
    $clrcval = 1;
    $pcval = $cval;
    FL:for ($i=0; $i <= $#$ientry; $i++) {
      if ($$ientry[$i] =~ /^(n .*,)(.*)/) {
        $$ientry[$i] = $1;
	splice @$ientry,$i+1,0,"n $2" if length $2;
	$commapos{$iitem} = $i+1;
	$cval = join("\002", @$ientry[0..$i]);
	$clrcval = 0;
	last FL;
      }
    }
    $cval = undef if $clrcval;
    $commanext{$iitem} = $commaafter{$piitem} = 1
      if $cval and ($cval eq $pcval);
    $piitem = $iitem;
  }
}

sub indexdiag {
  my $iitem,$ientry,$w,$ww,$foo,$node;
  open INDEXDIAG, '>', File::Spec->catfile($out_path, 'index.diag');
  foreach $iitem (@itags) {
    $ientry = $idxmap{$iitem};
    print INDEXDIAG "<$iitem> ";
    foreach $w (@$ientry) {
      $ww = &word_txt($w);
      print INDEXDIAG $ww unless $ww eq "\001";
    }
    print INDEXDIAG ":";
    $foo = " ";
    foreach $node (@nodes) {
      (print INDEXDIAG $foo,$node), $foo = ", " if $idxnodes{$node,$iitem};
    }
    print INDEXDIAG "\n";
  }
  close INDEXDIAG;
}

sub fixup_xrefs {
  my $pname, $p, $i, $j, $k, $caps, @repl;

  for ($p=0; $p<=$#pnames; $p++) {
    next if $pflags[$p] eq "code";
    $pname = $pnames[$p];
    for ($i=$#$pname; $i >= 0; $i--) {
	$k = $$pname[$i];
      if ($k =~ /^k/) {
        $caps = ($k =~ /^kK/);
	$k = substr($k,2);
        $repl = $refs{$k};
	die "$outfile: undefined keyword `$k'\n" unless $repl;
	substr($repl,0,1) =~ tr/a-z/A-Z/ if $caps;
	@repl = ();
	push @repl,"x $xrefs{$k}";
	foreach $j (split /\s+/,$repl) {
	  push @repl,"n $j";
	  push @repl,"sp";
	}
	pop @repl; # remove final space
	push @repl,"xe$xrefs{$k}";
	splice @$pname,$i,1,@repl;
      }
    }
  }
}

sub write_txt {
  # This is called from the top level, so I won't bother using
  # my or local.

  # Open file.
  print "writing file...";
  open TEXT, '>', File::Spec->catfile($out_path, 'nasmdoc.txt');
  select TEXT;

  # Preamble.
  $title = $metadata{'title'};
  $spaces = ' ' x ((75-(length $title))/2);
  ($underscore = $title) =~ s/./=/g;
  print "$spaces$title\n$spaces$underscore\n";

  for ($para = 0; $para <= $#pnames; $para++) {
    $pname = $pnames[$para];
    $pflags = $pflags[$para];
    $ptype = substr($pflags,0,4);

    print "\n"; # always one of these before a new paragraph

    if ($ptype eq "chap") {
      # Chapter heading. "Chapter N: Title" followed by a line of
      # minus signs.
      $pflags =~ /chap (.*) :(.*)/;
      $title = "Chapter $1: ";
      foreach $i (@$pname) {
        $ww = &word_txt($i);
        $title .= $ww unless $ww eq "\001";
      }
      print "$title\n";
      $title =~ s/./-/g;
      print "$title\n";
    } elsif ($ptype eq "appn") {
      # Appendix heading. "Appendix N: Title" followed by a line of
      # minus signs.
      $pflags =~ /appn (.*) :(.*)/;
      $title = "Appendix $1: ";
      foreach $i (@$pname) {
        $ww = &word_txt($i);
        $title .= $ww unless $ww eq "\001";
      }
      print "$title\n";
      $title =~ s/./-/g;
      print "$title\n";
    } elsif ($ptype eq "head" || $ptype eq "subh") {
      # Heading or subheading. Just a number and some text.
      $pflags =~ /.... (.*) :(.*)/;
      $title = sprintf "%6s ", $1;
      foreach $i (@$pname) {
        $ww = &word_txt($i);
        $title .= $ww unless $ww eq "\001";
      }
      print "$title\n";
    } elsif ($ptype eq "code") {
	# Code paragraph. Emit each line with a seven character indent.
	my $maxlen = 80;
	foreach $i (@$pname) {
	    warn "code line longer than $maxlen chars: $i\n"
		if ( length($i) > $maxlen );
	    print ' 'x7, $i, "\n";
	}
    } elsif ($ptype =~ /^(norm|bull|indt|bquo)$/) {
      # Ordinary paragraph, optionally indented. We wrap, with ragged
      # 75-char right margin and either 7 or 11 char left margin
      # depending on bullets.
      if ($ptype ne 'norm') {
	  $line = ' 'x7 . (($ptype eq 'bull') ? '(*) ' : '    ');
	  $next = ' 'x11;
      } else {
        $line = $next = ' 'x7;
      }
      @a = @$pname;
      $wd = $wprev = '';
      do {
        do { $w = &word_txt(shift @a) } while $w eq "\001"; # nasty hack
	$wd .= $wprev;
	if ($wprev =~ /-$/ || $w eq ' ' || $w eq '' || $w eq undef) {
	  if (length ($line . $wd) > 75) {
	    $line =~ s/\s*$//; # trim trailing spaces
	    print "$line\n";
	    $line = $next;
	    $wd =~ s/^\s*//; # trim leading spaces
	  }
	  $line .= $wd;
	  $wd = '';
	}
	$wprev = $w;
      } while ($w ne '' && $w ne undef);
      if ($line =~ /\S/) {
	$line =~ s/\s*$//; # trim trailing spaces
	print "$line\n";
      }
    }
  }

  # Close file.
  select STDOUT;
  close TEXT;
}

sub word_txt {
  my ($w) = @_;
  my $wtype, $wmajt;

  return undef if $w eq '' || $w eq undef;
  $wtype = substr($w,0,2);
  $wmajt = substr($wtype,0,1);
  $w = substr($w,2);
  $w =~ s/<.*>// if $wmajt eq "w"; # remove web links
  if ($wmajt eq "n" || $wtype eq "e " || $wtype eq "w ") {
    return $w;
  } elsif ($wtype eq "sp") {
    return ' ';
  } elsif ($wtype eq 'da' || $wtype eq 'dm') {
    return '-';
  } elsif ($wmajt eq "c" || $wtype eq "wc") {
    return "`${w}'";
  } elsif ($wtype eq "es") {
    return "_${w}";
  } elsif ($wtype eq "ee") {
    return "${w}_";
  } elsif ($wtype eq "eo") {
    return "_${w}_";
  } elsif ($wmajt eq "x" || $wmajt eq "i") {
    return "\001";
  } else {
    die "$outfile: panic in word_txt: $wtype$w\n";
  }
}

sub html_filename($) {
    my($node) = @_;

    $node = lc($node);
    if ($node eq 'contents') {
	return 'nasm00.html';
    } elsif ($node eq 'index') {
	return 'nasmix.html';
    } else {
	$node =~ /^(\w+)(?:[ -](\w+))?/;
	my $type = $1;
	my $number = $2;
	if ($type eq 'chapter') {
	    return sprintf 'nasm%02d.html', $number;
	} else {
	    # Appendix
	    return "nasma${number}.html";
	}
    }
}

my $html_fh;
sub html_openfile($) {
    my($node) = @_;
    die if (defined($html_fh));
    my $filename = html_filename($node);
    my $pathname = File::Spec->catfile($out_path, $filename);
    open($html_fh, '>', $pathname)
	or die "$0: $pathname: $!\n";
    select $html_fh;
    return $filename;
}
sub html_closefile() {
    if (defined($html_fh)) {
        select STDOUT;
        close($html_fh);
	undef $html_fh;
    }
}

my @html_navbar;

sub html_navbar_generate() {
    @html_navbar = (['Contents', html_filename('contents'), 'toc']);
    my $ctype;
    for ($node = $tstruct_next{'Top'}; $node; $node = $tstruct_next{$node}) {
	my $plevel = $tstruct_level{$node};

	next unless ($plevel == 1);
	next unless ($node =~ /^(\w+) ([\w\.]+)?/);
	my $nctype = $1;
	my $nname  = $2;

	if ($nctype ne $ctype) {
	    $nname = "$nctype\&ensp;$nname";
	    $ctype = $nctype;
	}
	push(@html_navbar, [$nname, html_filename($node), lc($ctype)]);
    }
    push(@html_navbar, ['Index', html_filename('Index'), 'index']);
}

# Open an HTML file and write common preamble code
sub html_preamble($) {
    my($node) = @_;
    my $filename = html_openfile($node);
    $node =~ /^(\w+)(?:[ -](\w+))?/;
    my $nodetype = lc($1);
    my $nodenum  = $2;
    $nodetype = 'toc' if ($nodetype eq 'contents');

    print "<!DOCTYPE html>\n";
    print "<head>\n";
    print "<meta charset=\"UTF-8\" />\n";
    print "<title>", $metadata{'title'}, "</title>\n";
    print "<link href=\"nasmdoc.css\" rel=\"stylesheet\" type=\"text/css\" />\n";
    print "<link href=\"local.css\" rel=\"stylesheet\" type=\"text/css\" />\n";
    print "</head>\n";
    print "<body>\n";

    # Navigation bar
    print "<div class=\"header\">\n";
    # Stray whitespace inside the <nav> tag cause problems; handle by
    # putting line breaks inside HTML comments. HACK!
    print "<nav class=\"navbar\" role=\"navigation\"><ul><!--\n";
    foreach my $nv (@html_navbar) {
	my $is_this = '';
	$is_this = ' navthis' if ($nv->[1] eq $filename);
	printf "--><li class=\"nav%s%s\"><a href=\"%s\">%s</a></li><!--\n",
	    $nv->[2], $is_this, $nv->[1], $nv->[0];
    }
    print "--></ul></nav>\n";

    print "<div class=\"title\">\n";
    print "<h1>", $metadata{'title'}, "</h1>\n";
    print '<h2>', $metadata{'subtitle'}, "</h2>\n";
    print "</div>\n";
    print "</div>\n";
    print "<div class=\"contents $nodetype\">\n";
}

# Write common postable code and close an HTML file
sub html_postamble {
    return unless (defined($html_fh));
    print "</div>\n</body>\n</html>\n";
    html_closefile();
}

sub write_html {
    # Create the navbar list
    html_navbar_generate();

    # Write contents file. Just the preamble, then a menu of links to the
  # separate chapter files and the nodes therein.
  print "writing contents file...";
  html_preamble('contents');
  print "<h2>Table of Contents</h2>\n";
  $level = 0;
  $ollevel = 0;

  sub toc_close_tags($) {
      my($plevel) = @_;
      while ($plevel < $level) {
	  print "</li>\n";
	  if ($level-- <= $ollevel) {
	      print "</ol>\n";
	      $ollevel--;
	  }
      }
  }

  undef $ctype;			# Chapter or Appendix
  for ($node = $tstruct_next{'Top'}; $node; $node = $tstruct_next{$node}) {
      my $plevel = $tstruct_level{$node};
      my @pnn = split(/[ \.]/, $node);
      (my $nname = $node) =~ s/^.*?\s+//;
      my $nnum = $pnn[-1] + 0 || ord($pnn[-1]) - ord('A') + 1;
      my $nctype = lc($pnn[0]);

      toc_close_tags($plevel);
      if ($plevel < 2 && $nctype ne $ctype) {
	  toc_close_tags(0);
	  my $plural = $nctype;
	  $plural =~ s/^(.)/\U$1/;
	  $plural =~ s/ix$/ice/; # ix -> ice + s -> ices
	  $plural .= 's';
	  print "<h3 class=\"tocheading $nctype\">$plural</h3>\n";
	  $ctype = $nctype;
      }

      while ($plevel > $level) {
	  $level++;
	  my $cclass = ($level == 1) ? " $ctype" : '';
	  print "<ol class=\"toc${level}${cclass}\"", ">\n";
	  $ollevel = $level;
      }

      if ($level == 1) {
	  $link = $fname = html_filename($node);
      } else {
	  # Use the preceding filename plus a marker point.
	  $link = $fname . "#$xrefnodes{$node}";
      }

      my $pname = $tstruct_pname{$node};
      my $title = plist_to_html(@$pname);
      printf "<li value=\"%d\" data-name=\"%s\">\n", $nnum, $nname;
      # The $node span is obsolete and is only included for now to avoid
      # breaking any existing local.css files.
      printf "<a href=\"%s\"><span class=\"node\">%s: </span>%s</a>\n",
	  $link, $node, $title;
  }
  toc_close_tags(0);
  html_postamble();

  # Open a null file, to ensure output (eg random &html_jumppoints calls)
  # goes _somewhere_.
  print "writing chapter files...";
  # open TEXT, '>', File::Spec->devnull();
  # select TEXT;

  $in_list = 0;
  $in_bquo = 0;
  $in_code = 0;

  for ($para = 0; $para <= $#pnames; $para++) {
    $pname = $pnames[$para];
    $pflags = $pflags[$para];
    $ptype = substr($pflags,0,4);

    $in_code = 0, print "</pre>\n" if ($in_code && $ptype ne 'code');
    $in_list = 0, print "</li>\n</ul>\n" if ($in_list && $ptype !~ /^(bull|indt|code)$/);
    $in_bquo = 0, print "</blockquote>\n" if ($in_bquo && $ptype ne 'bquo');

    $endtag = '';

    if ($ptype eq 'chap' || $ptype eq 'appn') {
      # Chapter/appendix heading. Begin a new file.
      $pflags =~ /^\w+ (.*) :(.*)/;
      $xref = $2;
      $title = "$2 $1:&ensp;";
      $title =~ s/-\S+//;
      $title =~ s/^(.)/\U$1/;
      html_postamble();
      $chapternode = $nodexrefs{$xref};
      html_preamble($chapternode);
      foreach $i (@$pname) {
	$ww = &word_html($i);
	$title .= $ww unless $ww eq "\001";
      }
      $h = "<h2 id=\"$xref\">$title</h2>\n";
      print $h;
    } elsif ($ptype eq "head" || $ptype eq "subh") {
      # Heading or subheading.
      $pflags =~ /.... (.*) :(.*)/;
      $hdr = ($ptype eq "subh" ? "h4" : "h3");
      $title = $1 . ".&ensp;";
      $xref = $2;
      foreach $i (@$pname) {
        $ww = &word_html($i);
        $title .= $ww unless $ww eq "\001";
      }
      print "<$hdr id=\"$xref\">$title</$hdr>\n";
    } elsif ($ptype eq "code") {
	# Code paragraph.
	$in_code = 1, print "<pre>" unless $in_code;
	print "\n";
	foreach $i (@$pname) {
	    $w = $i;
	    $w =~ s/&/&amp;/g;
	    $w =~ s/</&lt;/g;
	    $w =~ s/>/&gt;/g;
	    print $w, "\n";
	}
    } elsif ($ptype =~ /^(norm|bull|indt|bquo)$/) {
      # Ordinary paragraph, optionally indented.
	if ($ptype eq 'bull') {
	    if (!$in_list) {
		$in_list = 1;
		print "<ul>\n";
	    } else {
		print "</li>\n";
	    }
	    print "<li>\n";
	    $line = '<p>';
	    $endtag = '</p>';
      } elsif ($ptype eq 'indt') {
	  if (!$in_list) {
	      $in_list = 1;
	      print "<ul>\n";
	      print "<li class=\"indt\">\n"; # This is such a hack
	  }
	  $line = '<p>';
	  $endtag = '</p>';
      } elsif ($ptype eq 'bquo') {
	  $in_bquo = 1, print "<blockquote>\n" unless $in_bquo;
	  $line = '<p>';
	  $endtag = '</p>';
      } else {
        $line = '<p>';
        $endtag = '</p>';
      }
      @a = @$pname;
      $wd = $wprev = '';
      do {
        do { $w = &word_html(shift @a) } while $w eq "\001"; # nasty hack
	$wd .= $wprev;
	if ($w eq ' ' || $w eq '' || $w eq undef) {
	  if (length ($line . $wd) > 75) {
	    $line =~ s/\s*$//; # trim trailing spaces
	    print "$line\n";
	    $line = '';
	    $wd =~ s/^\s*//; # trim leading spaces
	  }
	  $line .= $wd;
	  $wd = '';
	}
	$wprev = $w;
      } while ($w ne '' && $w ne undef);
      if ($line =~ /\S/) {
	$line =~ s/\s*$//; # trim trailing spaces
	print $line;
      }
      print $endtag, "\n";
    }
  }

  # Close whichever file was open.
  print "</pre>\n" if ($in_code);
  print "</li>\n</ul>\n" if ($in_list);
  print "</blockquote>\n" if ($in_bquo);
  html_postamble();

  print "\n   writing index file...";
  html_preamble('index');
  print "<h2>Index</h2>\n";
  &html_index;
  html_postamble();

}

sub html_index {
  my $itag, $a, @ientry, $sep, $w, $wd, $wprev, $line;

  print "<ul>\n";

  $chapternode = '';
  foreach $itag (@itags) {
    $ientry = $idxmap{$itag};
    my @a = ('HDterm', @$ientry, 'HDref');
    $sep = 0;
    foreach $node (@nodes) {
	next if !$idxnodes{$node,$itag};
	my $xn = $xrefnodes{$node};
	my $nn = $node;

	# Text like "chapter", "appendix", "section", etc in the index
	# makes it unnecessarily wide
	$nn =~ s/^.*\s+//g;	# Remove all but the actual index information

	push @a, 'n ,', 'sp' if $sep;
	push @a, "x $xn", "n $nn", "xe$xn";
	$sep = 1;
    }
    print "<li>\n";
    while (defined($w = shift(@a))) {
	die unless ($w =~ /^HD(.*)$/);
	print "<div class=\"$1\">\n";

	$line = '';
	while ($w ne '' && $a[0] !~ /^HD/) {
	    $w = &word_html(shift @a);
	    next if ($w eq "\001"); # Nasty hack

	    if ($w =~ /^\s*$/ && length($line.$w) > 75) {
		$line =~ s/\s*$//; # trim trailing spaces
		print $line, "\n"; $line = '';
	    }
	    $line .= $w;
	}
	if ($line =~ /\S/) {
	    $line =~ s/\s*$//; # trim trailing spaces
	    print $line, "\n"; $line = '';
	}
	print "</div>\n";
    }
    print "</li>\n";
  }

  print "</ul>\n";
}

sub plist_to_html(@) {
    my $ws = '';

    foreach my $w (@_) {
	my $ww = word_html($w);
	next if ($ww eq "\001");
	$ws .= $ww;
    }

    return $ws;
}

sub word_html($) {
  my ($w) = @_;
  my $wtype, $wmajt, $pfx, $sfx;

  return undef if $w eq '' || $w eq undef;

  $wtype = substr($w,0,2);
  $wmajt = substr($wtype,0,1);
  $w = substr($w,2);
  $pfx = $sfx = '';
  $pfx = "<a href=\"$1\">", $sfx = "</a>", $w = $2
    if $wmajt eq "w" && $w =~ /^<(.*)>(.*)$/;
  $w =~ s/&/&amp;/g;
  $w =~ s/</&lt;/g;
  $w =~ s/>/&gt;/g;
  if ($wmajt eq "n" || $wtype eq "e " || $wtype eq "w ") {
    return $pfx . $w . $sfx;
  } elsif ($wtype eq "sp") {
    return ' ';
  } elsif ($wtype eq "da") {
    return '&ndash;';
  } elsif ($wtype eq "dm") {
    return '&mdash;';
  } elsif ($wmajt eq "c" || $wtype eq "wc") {
    return $pfx . "<code>${w}</code>" . $sfx;
  } elsif ($wtype eq "es") {
    return "<em>${w}";
  } elsif ($wtype eq "ee") {
    return "${w}</em>";
  } elsif ($wtype eq "eo") {
    return "<em>${w}</em>";
  } elsif ($wtype eq "x ") {
    # Magic: we must resolve the cross reference into file and marker
    # parts, then dispose of the file part if it's us, and dispose of
    # the marker part if the cross reference describes the top node of
    # another file.
    my $node = $nodexrefs{$w}; # find the node we're aiming at
    my $level = $tstruct_level{$node}; # and its level
    my $up = $node, $uplev = $level-1;
    $up = $tstruct_up{$up} while $uplev--; # get top node of containing file
    my $file = ($up ne $chapternode) ? html_filename($up) : "";
    my $marker = ($level == 1 and $file) ? "" : "#$w";
    return "<a href=\"$file$marker\">";
  } elsif ($wtype eq "xe") {
    return "</a>";
  } elsif ($wmajt eq "i") {
    return "\001";
  } else {
    die "$outfile: panic in word_html: $wtype$w\n";
  }
}

# Make tree structures. $tstruct_* is top-level and global.
sub add_item($$$) {
  my ($item, $level, $para) = @_;
  my $i;

  $tstruct_pname{$item} = $para;
  $tstruct_next{$tstruct_previtem} = $item;
  $tstruct_prev{$item} = $tstruct_previtem;
  $tstruct_level{$item} = $level;
  $tstruct_up{$item} = $tstruct_last[$level-1];
  $tstruct_mnext{$tstruct_last[$level]} = $item;
  $tstruct_last[$level] = $item;
  for ($i=$level+1; $i<$MAXLEVEL; $i++) { $tstruct_last[$i] = undef; }
  $tstruct_previtem = $item;
  push @nodes, $item;
}

#
# This produces documentation intermediate paragraph format; this is
# basically the digested output of the front end.  Intended for use
# by future backends, instead of putting it all in the same script.
#
sub write_dip {
  open(PARAS, '>', File::Spec->catfile($out_path, 'nasmdoc.dip'));
  foreach $k (sort(keys(%metadata))) {
      print PARAS 'meta :', $k, "\n";
      print PARAS $metadata{$k},"\n";
  }
  for ($para = 0; $para <= $#pnames; $para++) {
      print PARAS $pflags[$para], "\n";
      print PARAS join("\037", @{$pnames[$para]}, "\n");
  }
  foreach $k (@itags) {
      print PARAS 'indx :', $k, "\n";
      print PARAS join("\037", @{$idxmap{$k}}), "\n";
  }
  close(PARAS);
}
